#project_dir <- snakemake@config$project_dir
project_dir <-  '/project2/mstephens/ktayeb/logistic-susie-snakemake'
knitr::opts_knit$set(root.dir = project_dir)
library(ggcorrplot)
## Loading required package: ggplot2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(reticulate)
library(tidyr)
library(tictoc)
library(ggplot2)

source('workflow/scripts/plotting/plot_functions.R')

# httpgd::hgd()

load_tbl <- function(path){
    print(path)
    dplyr::as_tibble(readRDS(path))
}
pip_summary_files <- Sys.glob('results/gsea/*/*_pip_summary.rds')
res_pip <- purrr::map_dfr(pip_summary_files, ~load_tbl(.x))
## [1] "results/gsea/msigdb_c1_5k/gibss_abf_default_pip_summary.rds"
## [1] "results/gsea/msigdb_c1_5k/gibss_abf_L1_pip_summary.rds"
## [1] "results/gsea/msigdb_c1_5k/gibss_labf_default_pip_summary.rds"
## [1] "results/gsea/msigdb_c1_5k/gibss_labf_L1_pip_summary.rds"
## [1] "results/gsea/msigdb_c1_5k/linear_susie_default_pip_summary.rds"
## [1] "results/gsea/msigdb_c1_5k/linear_susie_L1_pip_summary.rds"
## [1] "results/gsea/msigdb_h/gibss_abf_default_pip_summary.rds"
## [1] "results/gsea/msigdb_h/gibss_abf_L1_pip_summary.rds"
## [1] "results/gsea/msigdb_h/gibss_labf_default_pip_summary.rds"
## [1] "results/gsea/msigdb_h/gibss_labf_L1_pip_summary.rds"
## [1] "results/gsea/msigdb_h/linear_susie_default_pip_summary.rds"
## [1] "results/gsea/msigdb_h/linear_susie_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_med/gibss_abf_default_pip_summary.rds"
## [1] "results/gsea/X_bin_med/gibss_abf_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_med/gibss_labf_default_pip_summary.rds"
## [1] "results/gsea/X_bin_med/gibss_labf_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_med/linear_susie_default_pip_summary.rds"
## [1] "results/gsea/X_bin_med/linear_susie_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_strong/gibss_abf_default_pip_summary.rds"
## [1] "results/gsea/X_bin_strong/gibss_abf_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_strong/gibss_labf_default_pip_summary.rds"
## [1] "results/gsea/X_bin_strong/gibss_labf_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_strong/linear_susie_default_pip_summary.rds"
## [1] "results/gsea/X_bin_strong/linear_susie_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_weak/gibss_abf_default_pip_summary.rds"
## [1] "results/gsea/X_bin_weak/gibss_abf_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_weak/gibss_labf_default_pip_summary.rds"
## [1] "results/gsea/X_bin_weak/gibss_labf_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_weak/linear_susie_default_pip_summary.rds"
## [1] "results/gsea/X_bin_weak/linear_susie_L1_pip_summary.rds"
cs_summary_files <- Sys.glob('results/gsea/*/*_cs_summary.rds')
res_cs <- purrr::map_dfr(cs_summary_files, ~dplyr::as_tibble(readRDS(.x)))

manifest <- readr::read_delim('config/gsea_sim/gsea_sim_manifest.tsv', delim='\t')
## Rows: 2880 Columns: 9
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): name, X, sim_hash, sim_id
## dbl (5): rep, beta0, beta, q, seed
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
threshold_var <- function(df, var, val, comp){
#   var <- as_string(ensym(var))
  filter(df, comp(.data[[var]], !!val))
}

cs_filtered_coverage <- function(res_cs, var, val, comp){
    # counts number of causal variables
    Q_tbl <- res_cs %>%
        select(c(method, sim_id, q)) %>%
        unique() %>%
        group_by(method) %>%
        summarize(Q = sum(q))

    # coverage, and frequency of number of causal variants per CS
    res_cs %>%
        left_join(Q_tbl) %>%
        threshold_var(var, val, comp) %>%
        mutate(val = val) %>%
        group_by(method) %>%
        summarize(coverage = mean(covered), n_found2 = list(table(n_found)), mean_size = mean(size), val = val[1], Q=Q[1], n_found = sum(n_found)) %>%
        unnest_wider(n_found2) 
}

make_plot <- function(res_cs, var, vals, op){
    # filter CSs based on `val` and `op`
    f <- function(val){
        res_cs %>%
            cs_filtered_coverage(var, val, op)
    }
    # compute coverage at each `val` in `vals`
    filtered_coverage <- purrr::map_dfr(vals, f)

    # plot coverage as a function of val
    a <- filtered_coverage %>%
        ggplot(aes(x = val, y = coverage, color = method)) +
        geom_path() + 
        geom_hline(yintercept = 0.95) +
        xlab(var)

    # plot power as a function of val
    b <- filtered_coverage %>%
        mutate(power = n_found/Q) %>%
        mutate(power = `1`/Q) %>%
        ggplot(aes(x = val, y = power, color = method)) +
        geom_path() +
        xlab(var)

    cowplot::plot_grid(a, b)
}

Weak LD Structure

LD Matrix

R_weak <- readRDS('results/gsea/X_bin_weak/ld.rds')
ggcorrplot(R_weak[1:20, 1:20])

### Simulation settings

# show unique simulation settings for SER simulations
manifest %>%
    dplyr::filter(X == 'X_bin_weak') %>%
    select(c(X, beta0, beta)) %>%
    unique() %>%
    rmarkdown::paged_table()

Credible sets

CS size distribution

# histogram of CSs, and coverage over all simulations
res_cs %>%
    dplyr::filter(X == 'X_bin_weak') %>%
    ggplot(aes(x = size)) + 
    geom_histogram() +
    facet_wrap(vars(method))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

CS coverage

res_cs %>%
    dplyr::filter(X == 'X_bin_weak') %>%
    group_by(method) %>%
    summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
    unnest_wider(n_found) %>%
    knitr::kable()
method coverage 0 1 2 3 4 5 mean_size
gibss_abf_L1 0.9916753 8 561 128 142 34 88 313.9199
gibss_abf_default 0.8185224 872 2569 164 620 132 448 594.2724
gibss_labf_L1 0.9771072 22 577 120 132 12 98 358.5515
gibss_labf_default 0.8747138 602 2771 236 632 90 474 608.1682
linear_susie_L1 0.9542144 44 615 114 106 18 64 266.3809
linear_susie_default 0.9125607 630 4295 180 1185 189 726 581.0319

The Laplace ABF gives lower coverage marginally over all CSs and all simulation scenarios. This is resolved by filtering out large CSs, which are largely uninformative.

res_cs %>%
    dplyr::filter(X == 'X_bin_weak') %>%
    filter(size < 200) %>%
    group_by(method) %>%
    summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
    unnest_wider(n_found) %>%
    knitr::kable()
method coverage 0 1 2 3 4 5 mean_size
gibss_abf_L1 0.9933665 4 413 114 42 18 12 4.820895
gibss_abf_default 0.9953134 8 1661 26 6 4 2 3.011716
gibss_labf_L1 1.0000000 NA 429 104 32 NA NA 2.647788
gibss_labf_default 0.9987738 2 1599 24 4 2 NA 3.136726
linear_susie_L1 0.9541985 30 499 98 24 4 NA 3.923664
linear_susie_default 0.9741766 69 2540 48 9 6 NA 2.500374

Conditional coverage

# filter on purity
vals <- c(seq(0.1, 0.9, by = 0.1), seq(0.91, .99, by = 0.01))
res_cs %>% 
    dplyr::filter(X == 'X_bin_weak') %>%
    make_plot('purity', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

# filter on cs size
vals <- seq(5, 100, by=5)
res_cs %>% 
    dplyr::filter(X == 'X_bin_weak') %>%
    make_plot('size', vals, `<`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

# filter on the SER-level BFs
# note multiple CSs may capture the same effect causing "power" > 1
vals <- seq(0, 100, by=5)
res_cs %>% 
    dplyr::filter(X == 'X_bin_weak') %>%
    make_plot('lbf_ser', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

Power vs FDP (PIP ordering)

# fdp vs power
fdp_plot <- res_pip %>%
    dplyr::filter(X == 'X_bin_weak') %>%
    unnest_longer(c(pip, causal)) %>%
    ggplot(aes(pip, causal, color=method)) + 
    stat_fdp_power(geom='path', max_fdp=1.0) +
    labs(x = 'FDP', y = 'Power')
fdp_plot

PIP Calibration

PIP calibration looks a little messy– may need more simulations to get a clear picture. Note that for the logistic SERs the only source of error here is the approximation error of using the Laplace ABF vs Wakefield ABF. TODO: add comparison to exact computation of the BF via quadrature.

pip_calibration_plot <- res_pip %>%
    dplyr::filter(X == 'X_bin_weak') %>%
    unnest_longer(c(pip, causal)) %>%
    ggplot(aes(pip, causal)) +
    stat_pip_calibration() +
    geom_abline(intercept = 0, slope = 1, color='red') +
    theme_bw() +
    labs(x='PIP', y='Frequency') + 
    facet_wrap(vars(method))
pip_calibration_plot
## Warning: Removed 6 rows containing missing values (`geom_pointrange()`).

med LD Structure

LD Matrix

R_med <- readRDS('results/gsea/X_bin_med/ld.rds')
ggcorrplot(R_med[1:20, 1:20])

### Simulation settings

# show unique simulation settings for SER simulations
manifest %>%
    dplyr::filter(X == 'X_bin_med') %>%
    select(c(X, beta0, beta)) %>%
    unique() %>%
    rmarkdown::paged_table()

Credible sets

CS size distribution

# histogram of CSs, and coverage over all simulations
res_cs %>%
    dplyr::filter(X == 'X_bin_med') %>%
    ggplot(aes(x = size)) + 
    geom_histogram() +
    facet_wrap(vars(method))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

CS coverage

res_cs %>%
    dplyr::filter(X == 'X_bin_med') %>%
    group_by(method) %>%
    summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
    unnest_wider(n_found) %>%
    knitr::kable()
method coverage 0 1 2 3 4 5 mean_size
gibss_abf_L1 0.9667014 32 582 150 108 33 56 238.0094
gibss_abf_default 0.7993757 964 2582 160 566 117 416 567.3617
gibss_labf_L1 0.9667014 32 602 132 110 18 67 273.2466
gibss_labf_default 0.8863684 546 2798 212 726 118 405 581.8418
linear_susie_L1 0.9542144 44 670 114 75 20 38 211.9542
linear_susie_default 0.9029840 699 4299 198 1195 129 685 559.6916

The Laplace ABF gives lower coverage marginally over all CSs and all simulation scenarios. This is resolved by filtering out large CSs, which are largely uninformative.

res_cs %>%
    dplyr::filter(X == 'X_bin_med') %>%
    filter(size < 200) %>%
    group_by(method) %>%
    summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
    unnest_wider(n_found) %>%
    knitr::kable()
method coverage 0 1 2 3 4 5 mean_size
gibss_abf_L1 0.9593614 28 466 138 34 21 2 5.586357
gibss_abf_default 0.9535386 86 1726 26 8 1 4 2.776337
gibss_labf_L1 0.9785605 14 494 122 14 4 5 5.797856
gibss_labf_default 0.9886942 20 1704 32 4 2 7 2.704353
linear_susie_L1 0.9445215 40 562 98 19 2 NA 5.775312
linear_susie_default 0.9672535 93 2667 45 19 12 4 4.014789

Conditional coverage

# filter on purity
vals <- c(seq(0.1, 0.9, by = 0.1), seq(0.91, .99, by = 0.01))
res_cs %>% 
    dplyr::filter(X == 'X_bin_med') %>%
    make_plot('purity', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

# filter on cs size
vals <- seq(5, 100, by=5)
res_cs %>% 
    dplyr::filter(X == 'X_bin_med') %>%
    make_plot('size', vals, `<`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

# filter on the SER-level BFs
# note multiple CSs may capture the same effect causing "power" > 1
vals <- seq(0, 100, by=5)
res_cs %>% 
    dplyr::filter(X == 'X_bin_med') %>%
    make_plot('lbf_ser', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

Power vs FDP (PIP ordering)

# fdp vs power
fdp_plot <- res_pip %>%
    dplyr::filter(X == 'X_bin_med') %>%
    unnest_longer(c(pip, causal)) %>%
    ggplot(aes(pip, causal, color=method)) + 
    stat_fdp_power(geom='path', max_fdp=1.0) +
    labs(x = 'FDP', y = 'Power')
fdp_plot

PIP Calibration

PIP calibration looks a little messy– may need more simulations to get a clear picture. Note that for the logistic SERs the only source of error here is the approximation error of using the Laplace ABF vs Wakefield ABF. TODO: add comparison to exact computation of the BF via quadrature.

pip_calibration_plot <- res_pip %>%
    dplyr::filter(X == 'X_bin_med') %>%
    unnest_longer(c(pip, causal)) %>%
    ggplot(aes(pip, causal)) +
    stat_pip_calibration() +
    geom_abline(intercept = 0, slope = 1, color='red') +
    theme_bw() +
    labs(x='PIP', y='Frequency') + 
    facet_wrap(vars(method))
pip_calibration_plot
## Warning: Removed 6 rows containing missing values (`geom_pointrange()`).

Strong LD Structure

LD Matrix

R_strong <- readRDS('results/gsea/X_bin_strong/ld.rds')
ggcorrplot(R_strong[1:100, 1:100])

Simulation settings

# show unique simulation settings for SER simulations
manifest %>%
    dplyr::filter(X == 'X_bin_strong') %>%
    select(c(X, beta0, beta)) %>%
    unique() %>%
    rmarkdown::paged_table()

Credible sets

CS size distribution

# histogram of CSs, and coverage over all simulations
res_cs %>%
    filter(X == 'X_bin_strong') %>%
    ggplot(aes(x = size)) + 
    geom_histogram() +
    facet_wrap(vars(method))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

CS coverage

res_cs %>%
    dplyr::filter(X == 'X_bin_strong') %>%
    group_by(method) %>%
    summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
    unnest_wider(n_found) %>%
    knitr::kable()
method coverage 0 1 2 3 4 5 mean_size
gibss_abf_L1 0.9041667 138 879 210 144 24 45 202.3250
gibss_abf_default 0.8295833 1227 4053 372 837 162 549 554.0187
gibss_labf_L1 0.9312500 99 969 156 135 24 57 232.3063
gibss_labf_default 0.9362500 459 4443 402 1137 213 546 575.1846
linear_susie_L1 0.8750000 180 981 117 96 30 36 188.8500
linear_susie_default 0.9191667 582 4464 249 1167 162 576 550.1612

The Laplace ABF gives lower coverage marginally over all CSs and all simulation scenarios. This is resolved by filtering out large CSs, which are largely uninformative.

res_cs %>%
    dplyr::filter(X == 'X_bin_strong') %>%
    filter(size < 200) %>%
    group_by(method) %>%
    summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
    unnest_wider(n_found) %>%
    knitr::kable()
method coverage 0 1 2 3 4 5 mean_size
gibss_abf_L1 0.8805556 129 708 189 48 3 3 18.255556
gibss_abf_default 0.8970134 300 2505 81 24 3 NA 10.173018
gibss_labf_L1 0.9190751 84 783 138 30 3 NA 14.283237
gibss_labf_default 0.9646409 96 2535 78 6 NA NA 8.215470
linear_susie_L1 0.8463612 171 816 102 15 9 NA 12.765499
linear_susie_default 0.8970438 303 2571 57 12 NA NA 8.835882

Conditional coverage

# filter on purity
vals <- c(seq(0.1, 0.9, by = 0.1), seq(0.91, .99, by = 0.01))
res_cs %>% 
    dplyr::filter(X == 'X_bin_strong') %>%
    make_plot('purity', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

# filter on cs size
vals <- seq(5, 100, by=5)
res_cs %>% 
    dplyr::filter(X == 'X_bin_strong') %>%
    make_plot('size', vals, `<`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

# filter on the SER-level BFs
# note multiple CSs may capture the same effect causing "power" > 1
vals <- seq(0, 100, by=5)
res_cs %>% 
    dplyr::filter(X == 'X_bin_strong') %>%
    make_plot('lbf_ser', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

Power vs FDP (PIP ordering)

# fdp vs power
fdp_plot <- res_pip %>%
    dplyr::filter(X == 'X_bin_strong') %>%
    unnest_longer(c(pip, causal)) %>%
    ggplot(aes(pip, causal, color=method)) + 
    stat_fdp_power(geom='path', max_fdp=1.0) +
    labs(x = 'FDP', y = 'Power')
fdp_plot

PIP Calibration

PIP calibration looks a little messy– may need more simulations to get a clear picture. Note that for the logistic SERs the only source of error here is the approximation error of using the Laplace ABF vs Wakefield ABF. TODO: add comparison to exact computation of the BF via quadrature.

pip_calibration_plot <- res_pip %>%
    dplyr::filter(X == 'X_bin_strong') %>%
    unnest_longer(c(pip, causal)) %>%
    ggplot(aes(pip, causal)) +
    stat_pip_calibration() +
    geom_abline(intercept = 0, slope = 1, color='red') +
    theme_bw() +
    labs(x='PIP', y='Frequency') + 
    facet_wrap(vars(method))
pip_calibration_plot
## Warning: Removed 6 rows containing missing values (`geom_pointrange()`).

MsigDb C2 (subset to 5k genes)

LD Matrix

R_strong <- readRDS('results/gsea/msigdb_c2_5k/ld.rds')
ggcorrplot(R_strong[1:100, 1:100])

Simulation settings

# show unique simulation settings for SER simulations
manifest %>%
    dplyr::filter(X == 'msigdb_c2_5k') %>%
    select(c(X, beta0, beta)) %>%
    unique() %>%
    rmarkdown::paged_table()

Credible sets

CS size distribution

# histogram of CSs, and coverage over all simulations
res_cs %>%
    filter(X == 'msigdb_c2_5k') %>%
    ggplot(aes(x = size)) + 
    geom_histogram() +
    facet_wrap(vars(method))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

CS coverage

res_cs %>%
    dplyr::filter(X == 'msigdb_c2_5k') %>%
    group_by(method) %>%
    summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
    unnest_wider(n_found) %>%
    knitr::kable()
method coverage 1 3 2 mean_size
gibss_abf_L1 1 1 NA NA 1.0
gibss_abf_default 1 5 NA NA 19.6
gibss_labf_L1 1 1 NA NA 1.0
gibss_labf_default 1 4 1 NA 19.2
linear_susie_L1 1 1 NA NA 1.0
linear_susie_default 1 3 NA 2 19.8

The Laplace ABF gives lower coverage marginally over all CSs and all simulation scenarios. This is resolved by filtering out large CSs, which are largely uninformative.

res_cs %>%
    dplyr::filter(X == 'msigdb_c2_5k') %>%
    filter(size < 200) %>%
    group_by(method) %>%
    summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
    unnest_wider(n_found) %>%
    knitr::kable()
method coverage 1 3 2 mean_size
gibss_abf_L1 1 1 NA NA 1.0
gibss_abf_default 1 5 NA NA 19.6
gibss_labf_L1 1 1 NA NA 1.0
gibss_labf_default 1 4 1 NA 19.2
linear_susie_L1 1 1 NA NA 1.0
linear_susie_default 1 3 NA 2 19.8

Conditional coverage

# filter on purity
vals <- c(seq(0.1, 0.9, by = 0.1), seq(0.91, .99, by = 0.01))
res_cs %>% 
    dplyr::filter(X == 'msigdb_c2_5k') %>%
    make_plot('purity', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

# filter on cs size
vals <- seq(5, 100, by=5)
res_cs %>% 
    dplyr::filter(X == 'msigdb_c2_5k') %>%
    make_plot('size', vals, `<`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

# filter on the SER-level BFs
# note multiple CSs may capture the same effect causing "power" > 1
vals <- seq(0, 100, by=5)
res_cs %>% 
    dplyr::filter(X == 'msigdb_c2_5k') %>%
    make_plot('lbf_ser', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

Power vs FDP (PIP ordering)

# fdp vs power
fdp_plot <- res_pip %>%
    dplyr::filter(X == 'msigdb_c2_5k') %>%
    unnest_longer(c(pip, causal)) %>%
    ggplot(aes(pip, causal, color=method)) + 
    stat_fdp_power(geom='path', max_fdp=1.0) +
    labs(x = 'FDP', y = 'Power')
fdp_plot

PIP Calibration

PIP calibration looks a little messy– may need more simulations to get a clear picture. Note that for the logistic SERs the only source of error here is the approximation error of using the Laplace ABF vs Wakefield ABF. TODO: add comparison to exact computation of the BF via quadrature.

pip_calibration_plot <- res_pip %>%
    dplyr::filter(X == 'msigdb_c2_5k') %>%
    unnest_longer(c(pip, causal)) %>%
    ggplot(aes(pip, causal)) +
    stat_pip_calibration() +
    geom_abline(intercept = 0, slope = 1, color='red') +
    theme_bw() +
    labs(x='PIP', y='Frequency') + 
    facet_wrap(vars(method))
pip_calibration_plot
## Warning: Removed 1 rows containing missing values (`geom_pointrange()`).

MsigDb H

LD Matrix

R_strong <- readRDS('results/gsea/msigdb_h/ld.rds')
ggcorrplot(R_strong[1:20, 1:20])

Simulation settings

# show unique simulation settings for SER simulations
manifest %>%
    dplyr::filter(X == 'msigdb_h') %>%
    select(c(X, beta0, beta)) %>%
    unique() %>%
    rmarkdown::paged_table()

Credible sets

CS size distribution

# histogram of CSs, and coverage over all simulations
res_cs %>%
    filter(X == 'msigdb_h') %>%
    ggplot(aes(x = size)) + 
    geom_histogram() +
    facet_wrap(vars(method))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

CS coverage

res_cs %>%
    dplyr::filter(X == 'msigdb_h') %>%
    group_by(method) %>%
    summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
    unnest_wider(n_found) %>%
    knitr::kable()
method coverage 0 1 2 3 4 5 mean_size
gibss_abf_L1 0.9750000 12 331 60 45 9 23 13.21458
gibss_abf_default 0.8513570 356 1334 139 289 105 172 30.58246
gibss_labf_L1 0.9875000 6 334 43 49 12 36 16.64792
gibss_labf_default 0.9219207 187 1373 163 359 115 198 31.56451
linear_susie_L1 0.9687500 15 352 33 48 5 27 13.93542
linear_susie_default 0.9637500 87 1476 56 476 51 254 30.53875

The Laplace ABF gives lower coverage marginally over all CSs and all simulation scenarios. This is resolved by filtering out large CSs, which are largely uninformative.

res_cs %>%
    dplyr::filter(X == 'msigdb_h') %>%
    filter(size < 200) %>%
    group_by(method) %>%
    summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
    unnest_wider(n_found) %>%
    knitr::kable()
method coverage 0 1 2 3 4 5 mean_size
gibss_abf_L1 0.9750000 12 331 60 45 9 23 13.21458
gibss_abf_default 0.8513570 356 1334 139 289 105 172 30.58246
gibss_labf_L1 0.9875000 6 334 43 49 12 36 16.64792
gibss_labf_default 0.9219207 187 1373 163 359 115 198 31.56451
linear_susie_L1 0.9687500 15 352 33 48 5 27 13.93542
linear_susie_default 0.9637500 87 1476 56 476 51 254 30.53875

Conditional coverage

# filter on purity
vals <- c(seq(0.1, 0.9, by = 0.1), seq(0.91, .99, by = 0.01))
res_cs %>% 
    dplyr::filter(X == 'msigdb_h') %>%
    make_plot('purity', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

# filter on cs size
vals <- seq(5, 100, by=5)
res_cs %>% 
    dplyr::filter(X == 'msigdb_h') %>%
    make_plot('size', vals, `<`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

# filter on the SER-level BFs
# note multiple CSs may capture the same effect causing "power" > 1
vals <- seq(0, 100, by=5)
res_cs %>% 
    dplyr::filter(X == 'msigdb_h') %>%
    make_plot('lbf_ser', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

Power vs FDP (PIP ordering)

# fdp vs power
fdp_plot <- res_pip %>%
    dplyr::filter(X == 'msigdb_h') %>%
    unnest_longer(c(pip, causal)) %>%
    ggplot(aes(pip, causal, color=method)) + 
    stat_fdp_power(geom='path', max_fdp=1.0) +
    labs(x = 'FDP', y = 'Power')
fdp_plot

PIP Calibration

PIP calibration looks a little messy– may need more simulations to get a clear picture. Note that for the logistic SERs the only source of error here is the approximation error of using the Laplace ABF vs Wakefield ABF. TODO: add comparison to exact computation of the BF via quadrature.

pip_calibration_plot <- res_pip %>%
    dplyr::filter(X == 'msigdb_h') %>%
    unnest_longer(c(pip, causal)) %>%
    ggplot(aes(pip, causal)) +
    stat_pip_calibration() +
    geom_abline(intercept = 0, slope = 1, color='red') +
    theme_bw() +
    labs(x='PIP', y='Frequency') + 
    facet_wrap(vars(method))
pip_calibration_plot
## Warning: Removed 5 rows containing missing values (`geom_pointrange()`).

MsigDb C1 (subset to 5k)

LD Matrix

R_strong <- readRDS('results/gsea/msigdb_h/ld.rds')
ggcorrplot(R_strong[1:20, 1:20])

Simulation settings

# show unique simulation settings for SER simulations
manifest %>%
    dplyr::filter(X == 'msigdb_h') %>%
    select(c(X, beta0, beta)) %>%
    unique() %>%
    rmarkdown::paged_table()

Credible sets

CS size distribution

# histogram of CSs, and coverage over all simulations
res_cs %>%
    filter(X == 'msigdb_h') %>%
    ggplot(aes(x = size)) + 
    geom_histogram() +
    facet_wrap(vars(method))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

CS coverage

res_cs %>%
    dplyr::filter(X == 'msigdb_h') %>%
    group_by(method) %>%
    summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
    unnest_wider(n_found) %>%
    knitr::kable()
method coverage 0 1 2 3 4 5 mean_size
gibss_abf_L1 0.9750000 12 331 60 45 9 23 13.21458
gibss_abf_default 0.8513570 356 1334 139 289 105 172 30.58246
gibss_labf_L1 0.9875000 6 334 43 49 12 36 16.64792
gibss_labf_default 0.9219207 187 1373 163 359 115 198 31.56451
linear_susie_L1 0.9687500 15 352 33 48 5 27 13.93542
linear_susie_default 0.9637500 87 1476 56 476 51 254 30.53875

The Laplace ABF gives lower coverage marginally over all CSs and all simulation scenarios. This is resolved by filtering out large CSs, which are largely uninformative.

res_cs %>%
    dplyr::filter(X == 'msigdb_h') %>%
    filter(size < 200) %>%
    group_by(method) %>%
    summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
    unnest_wider(n_found) %>%
    knitr::kable()
method coverage 0 1 2 3 4 5 mean_size
gibss_abf_L1 0.9750000 12 331 60 45 9 23 13.21458
gibss_abf_default 0.8513570 356 1334 139 289 105 172 30.58246
gibss_labf_L1 0.9875000 6 334 43 49 12 36 16.64792
gibss_labf_default 0.9219207 187 1373 163 359 115 198 31.56451
linear_susie_L1 0.9687500 15 352 33 48 5 27 13.93542
linear_susie_default 0.9637500 87 1476 56 476 51 254 30.53875

Conditional coverage

# filter on purity
vals <- c(seq(0.1, 0.9, by = 0.1), seq(0.91, .99, by = 0.01))
res_cs %>% 
    dplyr::filter(X == 'msigdb_h') %>%
    make_plot('purity', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

# filter on cs size
vals <- seq(5, 100, by=5)
res_cs %>% 
    dplyr::filter(X == 'msigdb_h') %>%
    make_plot('size', vals, `<`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

# filter on the SER-level BFs
# note multiple CSs may capture the same effect causing "power" > 1
vals <- seq(0, 100, by=5)
res_cs %>% 
    dplyr::filter(X == 'msigdb_h') %>%
    make_plot('lbf_ser', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.

Power vs FDP (PIP ordering)

# fdp vs power
fdp_plot <- res_pip %>%
    dplyr::filter(X == 'msigdb_h') %>%
    unnest_longer(c(pip, causal)) %>%
    ggplot(aes(pip, causal, color=method)) + 
    stat_fdp_power(geom='path', max_fdp=1.0) +
    labs(x = 'FDP', y = 'Power')
fdp_plot

PIP Calibration

PIP calibration looks a little messy– may need more simulations to get a clear picture. Note that for the logistic SERs the only source of error here is the approximation error of using the Laplace ABF vs Wakefield ABF. TODO: add comparison to exact computation of the BF via quadrature.

pip_calibration_plot <- res_pip %>%
    dplyr::filter(X == 'msigdb_h') %>%
    unnest_longer(c(pip, causal)) %>%
    ggplot(aes(pip, causal)) +
    stat_pip_calibration() +
    geom_abline(intercept = 0, slope = 1, color='red') +
    theme_bw() +
    labs(x='PIP', y='Frequency') + 
    facet_wrap(vars(method))
pip_calibration_plot
## Warning: Removed 5 rows containing missing values (`geom_pointrange()`).